params
## $genomefasta
## [1] "reference/Schizosaccharomyces_pombe.ASM294v2.dna.toplevel.fa"
##
## $gtf
## [1] "reference/Schizosaccharomyces_pombe.ASM294v2.55.gtf"
##
## $chiptrackfile
## [1] "data/genome_browser_tracks_chipseq.csv.gz"
##
## $peakcsv
## [1] "data/fused_peaks_filtered.csv.gz"
##
## $peakenr
## [1] "data/fused_peaks_filtered_enrichments.csv.gz"
##
## $peakatacfile
## [1] "data/fused_peaks_filtered_counts-external_atac.csv.gz"
##
## $peakchipfile
## [1] "data/fused_peaks_filtered_counts-external_chip.csv.gz"
##
## $dbdtxt
## [1] "data/TF_DBD_annotation.txt"
##
## $motifrds
## [1] "data/motifs_annotated.rds"
##
## $txcsv
## [1] "data/promoters-1kb_tes-1kb_annotated.csv.gz"
##
## $ipms150untag
## [1] "data/ipms_150_untag_sce.rds"
suppressPackageStartupMessages({
library(circlize)
library(ComplexHeatmap)
library(grid)
library(ggplot2)
library(cowplot)
library(Biostrings)
library(BSgenome)
library(GenomicRanges)
library(rtracklayer)
library(tibble)
library(tidyr)
library(universalmotif)
library(parallel)
library(colorspace)
library(scattermore)
library(ggupset)
library(dplyr)
library(viridisLite)
library(SummarizedExperiment)
library(S4Vectors)
library(jsonlite)
library(dendextend)
library(kableExtra)
library(forcats)
})
source("params/plot_settings.R")
source("params/get_testres_function.R")
source("params/mapping_functions.R")
source("params/plottracks_function.R")
# define heatmap dimensions and font sizes
w_peaksHm <- 84 # heatmap body width (mm)
h_peaksHm <- 178 # total heatmap height (mm)
fs <- 8 # small font size
fm <- 9 # medium font size
fl <- 10 # large font size
Here we define a few helper functions for using below:
# make first letter of a string lowercase
.lowercase <- function(x) {
paste0(tolower(substr(x, 1, 1)), substr(x, 2, nchar(x)))
}
# for each region in `from`, calculate the distance to its nearest element in `to`
# if there are no element in `to` on the sequence of `from`, set distance to NA
calc_distance_to_nearest <- function(from, to) {
stopifnot(exprs = {
is(from, "GRanges")
is(to, "GRanges")
})
tmp <- distanceToNearest(x = from, subject = to)
d <- rep(NA, length(from))
d[queryHits(tmp)] <- mcols(tmp)$distance
return(d)
}
We start by reading all tables and objects needed for this figure. Count tables are in addition normalized, and we also calculate averages over replicates or all samples where needed:
# genome sequence
gnm <- readDNAStringSet(params$genomefasta)
names(gnm) <- sub(" .+$", "", names(gnm))
# `peakgr`: ChIP-seq peaks (as a GRanges object)
peakgr <- as(read.csv(params$peakcsv, row.names = 1), "GRanges")
peakgr$peaktype <- factor(peakgr$peaktype,
levels = c("common peaks (ubiquitous)",
"common peaks (frequent)",
"specific peaks"))
peakseq <- getSeq(gnm, peakgr)
# `peakenr`: peak IP-enrichments log2(IP/input)
peakenr <- as.matrix(read.csv(params$peakenr, row.names = 1))
# ... average replicates (`peakenrAvg`)
grp <- sub("_rep[12]$", "", colnames(peakenr))
peakenrAvg <- do.call(cbind, lapply(split(colnames(peakenr), grp)[unique(grp)],
function(j) {
rowMeans(peakenr[, j, drop = FALSE])
}))
# `chip_genome_tab`: data.frame with alignment densities in selected
# genomic regions for selected samples
chip_genome_tab <- read.csv(params$chiptrackfile)
# `atac_raw`: public ATAC-seq read counts in ChIP-seq peaks from this study
atac_raw <- as.matrix(read.csv(params$peakatacfile, row.names = 1))
# ... calculate reads per million (`atac_cpm`) and
# per kilobase and million (`atac_rpkm`)
# and average experiments (`atac_cpm_avg`, `atac_rpkm_avg`)
atac_cpm <- sweep(x = atac_raw[, -1], MARGIN = 2,
STATS = colSums(atac_raw[, -1]), FUN = "/") * 1e6
atac_rpkm <- atac_cpm / atac_raw[, "width"] * 1e3
atac_cpm_avg <- rowMeans(atac_cpm)
atac_rpkm_avg <- rowMeans(atac_rpkm)
# `chip_raw`: public ChIP-seq read counts in ChIP-seq peaks from this study
chip_raw <- as.matrix(read.csv(params$peakchipfile, row.names = 1))
# ... calculate reads per million (`chip_cpm`) and
# per kilobase and million (`chip_rpkm`)
# and calculate enrichments (`chip_enr`, log2 IP/input) averaged over replicates
chip_cpm <- sweep(x = chip_raw[, -1], MARGIN = 2,
STATS = colSums(chip_raw[, -1]), FUN = "/") * 1e6
chip_rpkm <- chip_cpm / chip_raw[, "width"] * 1e3
chip_series <- sub("^ChIP_([^_]+)_.+$", "\\1", colnames(chip_cpm))
chip_enr <- do.call(
cbind,
lapply(split(colnames(chip_cpm), chip_series),
function(nms) {
grp <- sub("ChIP_(GSE[0-9]+)_GSM[0-9]+_(.+)_rep[0-9]$",
"\\1_\\2", nms)
stopifnot(any(is_input <- grep("_Input", grp)))
enr <- do.call(
cbind,
lapply(unique(grp[-is_input]), function(grp1) {
rowMeans(log2(chip_cpm[, nms[grp == grp1], drop = FALSE] + 1)) -
rowMeans(log2(chip_cpm[, nms[is_input], drop = FALSE] + 1))
}))
colnames(enr) <- unique(grp[-is_input])
enr
}))
# `genegr`: chromosomal ranges of genes (as GRanges object)
genegr <- rtracklayer::import(params$gtf, feature.type = "gene")
# `motifs`: data.frame with annotated motifs
motifs <- readRDS(params$motifrds)
# `txannot`: data.frame with transcript start site (TSS) and
# transcript end site (TES) annotation
txannot <- read.csv(params$txcsv, row.names = 1)
# `dbd`: data.frame with information of transcription factors used as
# IP-MS baits and their DNA binding domains
dbd <- read.delim(params$dbdtxt)
# Percent of TF-bound gene promoters
round(100 * mean(txannot$prom.number.overlapping.highconf.peaks > 0), 0)
## [1] 32
# Percent of TF-bound gene promoters, by gene biotype
txannot |>
group_by(gene_biotype) |>
summarise("Number of genes" = n(),
"Percent promoters with TF enrichments" = round(
100 * mean(prom.number.overlapping.highconf.peaks > 0), 0),
.groups = "drop") |>
knitr::kable()
| gene_biotype | Number of genes | Percent promoters with TF enrichments |
|---|---|---|
| RNase_MRP_RNA | 1 | 100 |
| RNase_P_RNA | 1 | 100 |
| SRP_RNA | 1 | 100 |
| ncRNA | 1534 | 36 |
| protein_coding | 5146 | 27 |
| pseudogene | 29 | 28 |
| rRNA | 88 | 78 |
| snRNA | 9 | 100 |
| snoRNA | 81 | 73 |
| tRNA | 379 | 65 |
# TF enrichments at promoters of TF genes
txannot |>
filter(gene_id %in% dbd$PomBaseID) |>
summarise("Number of TF genes" = n(),
"Number of TF promoters with at least one enriched TF" =
sum(prom.number.distinct.enriched.TFs > 0),
"Number of TF promoters with multiple enriched TFs" =
sum(prom.number.distinct.enriched.TFs > 1),
"Number of TF promoters without detected TF enrichment" =
sum(prom.number.distinct.enriched.TFs == 0),
"Number of TFs enriched at any promoter" = length(unique(unlist(strsplit(prom.enriched.TFs, ";")))),
"Number of TFs enriched at any promoter (w/o zas1\U207A promoter)" = length(unique(unlist(strsplit(prom.enriched.TFs[gene_name != "zas1"], ";")))),
"Number of TFs enriched at zas1\U207A promoter" = length(unique(unlist(strsplit(prom.enriched.TFs[gene_name == "zas1"], ";")))),
"Number of TFs enriched at their own promoter" = sum(binds.own.promoter),
"Number of TFs enriched at atf1\U207A promoter" = length(unique(unlist(strsplit(prom.enriched.TFs[gene_name == "atf1"], ";")))),
"Number of TF promoters with Atf1 enrichment" = length(grep("Atf1", prom.enriched.TFs)),
.groups = "drop") |>
pivot_longer(cols = starts_with("Number"),
values_to = "Number", names_to = "Set") |>
knitr::kable()
| Set | Number |
|---|---|
| Number of TF genes | 89 |
| Number of TF promoters with at least one enriched TF | 43 |
| Number of TF promoters with multiple enriched TFs | 26 |
| Number of TF promoters without detected TF enrichment | 46 |
| Number of TFs enriched at any promoter | 59 |
| Number of TFs enriched at any promoter (w/o zas1⁺ promoter) | 48 |
| Number of TFs enriched at zas1⁺ promoter | 48 |
| Number of TFs enriched at their own promoter | 11 |
| Number of TFs enriched at atf1⁺ promoter | 11 |
| Number of TF promoters with Atf1 enrichment | 11 |
# Total number of identified DNA-binding motifs
nrow(motifs)
## [1] 67
# Number of TFs for which at least one motif was identified
length(unique(motifs$tf_name))
## [1] 38
# Number (percent) of common frequent peaks with at least on Atf1.m1 DNA-binding motif (CRE motif)
hits <- universalmotif::scan_sequences(motifs = motifs$motif[motifs$name == "Atf1.m1"],
sequences = peakseq[peakgr$peaktype == "common peaks (frequent)"],
threshold = 1e-4,
threshold.type = "pvalue",
RC = TRUE,
verbose = 0,
nthreads = 1,
return.granges = TRUE)
length(unique(hits$sequence.i))
## [1] 33
round(100 * length(unique(hits$sequence.i)) / sum(peakgr$peaktype == "common peaks (frequent)"))
## [1] 35
TFexplorerStarting from the peaks (peakgr) and ChIP enrichments in peaks (peakenr), prepare and export the matrix in json format for use in TFexplorer.
# average replicate ChIP enrichments
grp <- sub("_rep[12]$", "", colnames(peakenr))
peakenrExport <- do.call(cbind,
lapply(structure(unique(grp), names = unique(grp)),
function(nm) {
rowMeans(peakenr[, grp == nm])
}))
# replace all enrichment values below 0 with 0
peakenrExport[peakenrExport < 0] <- 0
# cluster
dist_mat2 <- dist(peakenrExport, method = "euclidian")
hclust_avg2 <- hclust(dist_mat2, method = "average")
peakenrExportClustered <- peakenrExport[hclust_avg2$order, ]
# calculate extend peak coordinates and IGV position
IGVpos <- paste0(seqnames(peakgr), ":",
start(peakgr) - 100, "-",
end(peakgr) + 100)
IGVposClustered <- IGVpos[hclust_avg2$order]
# create peak position strings
peakPos <- paste0(seqnames(peakgr), ":", start(peakgr), "-", end(peakgr))
peakPosClustered <- peakPos[hclust_avg2$order]
# export peaks and enrichments as json
# ... calculate maximum enrichment value in each row
ChipMax <- apply(peakenrExportClustered, 1, max)
range(ChipMax)
## [1] 0.000000 6.719137
# ... convert to JSON
heatmapDataChip <- toJSON(
list(
heatmapMatrix = peakenrExportClustered,
TFs = colnames(peakenrExportClustered),
peaks = rownames(peakenrExportClustered),
peakPos = peakPosClustered,
IGVpos = IGVposClustered,
ChipMax = ChipMax
), pretty = TRUE)
# ... write into a file for TFexplorer
write(heatmapDataChip, "data/heatmapDataChIP.json")
We want to identify ChIP-seq peaks that are near a tRNA or rRNA gene. We can obtain the coordinates of these genes from genegr and the coordinates of peaks from peakgr, and calculate the distance between nearest peak-gene pairs using distanceToNearest(). Any pair with a distance below maxdist will be classified as “near” in the plots below.
Remark: Nearest distances can be NA in cases where for example a peak resides on a sequence (chromosome) that does not contain any tRNA or rRNA gene, or vice versa, such as the sequence AB325691 (which contains gap-filling sequence between SPBPB21E7.09 and SPBPB10D8.01 in chromosome 2) or the mitochondrial sequence MT.
# distance less than `maxdist` are defined as "near"
maxdist <- 100
# tRNA
# ... PomBase and Ensembl_Fungi annotate 196 and 183 tRNAs, respectively
# we combine the two and obtain 198 annotated tRNAs
table(genegr$gene_biotype == "tRNA", genegr$source)
##
## PomBase Ensembl_Fungi
## FALSE 6818 71
## TRUE 196 183
is_tRNA_pombase <- genegr$source == "PomBase" & genegr$gene_biotype == "tRNA"
is_tRNA_ensembl <- genegr$source == "Ensembl_Fungi" & genegr$gene_biotype == "tRNA"
is_tRNA <- is_tRNA_pombase | (is_tRNA_ensembl & !genegr %in% genegr[is_tRNA_pombase])
sum(is_tRNA)
## [1] 198
# ... now we measure distances between nearest pairs
# either from peak to nearest tRNA
dist.peak2tRNA <- calc_distance_to_nearest(from = peakgr, to = genegr[is_tRNA])
# or from tRNA to nearest peak
dist.tRNA2peak <- calc_distance_to_nearest(from = genegr[is_tRNA], to = peakgr)
# rRNA
# ... all 36 5S_rRNA genes in our annotation stem from Ensembl_Fungi
table(genegr$gene_name == "5S_rRNA", genegr$source)
##
## PomBase Ensembl_Fungi
## FALSE 4524 218
## TRUE 0 36
is_rRNA <- !is.na(genegr$gene_name) & genegr$gene_name == "5S_rRNA"
sum(is_rRNA)
## [1] 36
# ... now we measure distances between nearest pairs
# either from peak to nearest rRNA
dist.peak2rRNA <- calc_distance_to_nearest(from = peakgr, to = genegr[is_rRNA])
# or from rRNA to nearest peak
dist.rRNA2peak <- calc_distance_to_nearest(from = genegr[is_rRNA], to = peakgr)
We calculate the similarities between any pair of motifs using the compare_motifs() function with method = "PCC" (Pearson correlation coefficient). We allow for reverse-complements (tryRC = TRUE) and require a minimal overlap between the two motifs of four nucleotides (min.overlap = 4).
motifsim <- compare_motifs(motifs = motifs$motif,
method = "PCC", tryRC = TRUE,
min.overlap = 4, min.mean.ic = 0.25,
normalise.scores = TRUE)
For each motif, we visualize the fraction of sequence hits (sites in the genome that score highly for the motif) which are bound by the corresponding transcription factor (overlap with an enriched peak of that transcription factor). Fractions are shown separately for each peak class.
pd <- motifs |>
select(name, total_hits, starts_with("bound_hits")) |>
mutate(frac_bound_hits_any = (bound_hits_common.peaks.ubiquitous +
bound_hits_common.peaks.frequent + bound_hits_specific.peaks) / total_hits,
`Common (ubiquitous)` = bound_hits_common.peaks.ubiquitous / total_hits,
`Common (frequent)` = bound_hits_common.peaks.frequent / total_hits,
`Specific` = bound_hits_specific.peaks / total_hits) |>
# arrange(desc(frac_bound_hits_any)) |>
arrange(frac_bound_hits_any) |>
mutate(name = factor(name, levels = name)) |>
pivot_longer(cols = c("Common (ubiquitous)", "Common (frequent)",
"Specific"), names_to = "peak_set", values_to = "fraction_bound") |>
mutate(peak_set = factor(peak_set, levels = c("Common (ubiquitous)", "Common (frequent)",
"Specific")))
(p_fracbound <- ggplot(pd, aes(fraction_bound, name, fill = peak_set)) +
geom_col() +
scale_x_continuous(expand = expansion(mult = 0),
labels = scales::label_percent()) +
scale_fill_manual(values = peakset_colors) +
labs(x = "Percent of motif sites",
y = element_blank(),
fill = element_blank()) +
theme_cowplot(12) +
theme(legend.position = "inside",
legend.position.inside = c(0.95, 0.05),
legend.justification.inside = c(1, 0),
axis.text.y = element_text(angle = 0, hjust = 1, size = 8)) +
guides(fill = guide_legend(ncol = 1)))
The following creates heatmap of peaks (rows) versus ChIP-seq experiments (columns). The values are binary (TRUE or FALSE) and indicate if a peak was enriched in ChIP-seq experiment (had an average log2 IP/input enrichment values greater than 1.0).
# extract binary enrichment matrix from `peakgr`
m <- as.matrix(mcols(peakgr)[, grep("^is_enr_in[.]", colnames(mcols(peakgr)))])
mode(m) <- "numeric"
colnames(m) <- sub("is_enr_in.", "", colnames(m))
# specify ChIP-seq experiments for which to show labels
selLabels <- c("Untagged")
stopifnot(all(selLabels %in% colnames(m)))
# only show peaks that are enriched in at least one ChIP-seq experiment
sel_peaksBin <- rowSums(m) > 0
# prepare annotation data
# `fractTFs`: fraction of ChIP-seq experiments that a peak is enriched in
# `numpeaks`: the number of enriched peaks for each ChIP-seq experiment
# `peaktype`: the type of each peak (specific, frequent or ubiquitous)
# `near_ncRNA`: specifies if the peak is nearer than `maxdist` from
# a %S rRNA or tRNA gene
fracTFs <- rowMeans(m > 0)
numpeaks <- colSums(m > 0)
levels(peakgr$peaktype) <- c("Common (ubiquitous)", # use shorter names
"Common (frequent)",
"Specific peaks")
peaktype <- factor(gsub("[()]", "", sub(" ", "\n", peakgr$peaktype)),
levels = c("Common\nubiquitous",
"Common\nfrequent",
"Specific\npeaks"))
near_ncRNA <- factor(ifelse(!is.na(dist.peak2tRNA) & dist.peak2tRNA < maxdist,
"tRNA",
ifelse(!is.na(dist.peak2rRNA) & dist.peak2rRNA < maxdist,
"5S rRNA", "none")),
levels = c("tRNA", "5S rRNA", "none"))
# most tRNA and 5S rRNA genes are near "common (ubiquitous)" peaks:
table(peaktype[sel_peaksBin], near_ncRNA[sel_peaksBin])
##
## tRNA 5S rRNA none
## Common\nubiquitous 107 27 113
## Common\nfrequent 1 0 93
## Specific\npeaks 5 1 1148
# parameters for annotation legends and colors
annotLegendParams <- list(`%GC` = list(at = c(20, 40, 60),
labels_gp = gpar(fontsize = fs),
border = "gray10",
legend_direction = "horizontal",
legend_width = unit(18, "mm"),
title_position = "topcenter",
title_gp = gpar(fontsize = fl)),
`TSS dist.` = list(at = c(0, 4000, 8000),
labels_gp = gpar(fontsize = fs),
border = "gray10",
legend_direction = "horizontal",
legend_width = unit(18, "mm"),
title_position = "topcenter",
title_gp = gpar(fontsize = fl)),
`ncRNA` = list(labels_gp = gpar(fontsize = fs),
title_position = "topcenter",
title_gp = gpar(fontsize = fl)))
cols_ncRNA <- c("tRNA" = main_colors[5], "5S rRNA" = main_colors[3],
"none" = bg_color)
annotCols <- list(
`%GC` = colorRamp2(breaks = seq(min(peakgr$fracGC[sel_peaksBin]),
max(peakgr$fracGC[sel_peaksBin]),
length.out = 64) * 100,
colors = rev(hcl.colors(64, "Greens"))),
` width` = colorRamp2(breaks = seq(min(width(peakgr)[sel_peaksBin]),
max(width(peakgr)[sel_peaksBin]),
length.out = 64),
colors = rev(hcl.colors(64, "YlOrBr"))),
`TSS dist.` = colorRamp2(breaks = c(0, 10^seq(2, log10(max(peakgr$nrst_TSS_dist)),
length.out = 63)),
colors = hcl.colors(64)),
`ncRNA` = cols_ncRNA)
# prepare heatmap annotations
# `peakAnnot` for peaks (rows), left side
# `fracTFsAnnot` for peaks (rows), right side
# `sampleAnnot` for ChIP-seq experiments (columns), top
# `sampleLabels` for ChIP-seq experiments (columns), bottom
peakAnnot <- HeatmapAnnotation(
which = "row",
`%GC` = peakgr$fracGC[sel_peaksBin] * 100,
`TSS dist.` = peakgr$nrst_TSS_dist[sel_peaksBin],
`ncRNA` = near_ncRNA[sel_peaksBin],
col = annotCols,
width = unit(21, "mm"), show_legend = TRUE,
annotation_name_gp = gpar(fontsize = fl),
annotation_legend_param = annotLegendParams)
fracTFsAnnot <- HeatmapAnnotation(
which = "row",
`Fraction\nof TFs` = anno_barplot(
x = fracTFs[sel_peaksBin], gp = gpar(col = na_color),
border = FALSE, bar_width = 1.0,
axis_param = list(at = c(0, 0.4, 0.8),
gp = gpar(fontsize = fs)),
width = unit(10, "mm")),
annotation_name_gp = gpar(fontsize = fl))
sampleAnnot <- HeatmapAnnotation(
which = "column",
`Number of\nenriched\npeaks` = anno_barplot(
x = numpeaks,
gp = gpar(col = 0, fill = na_color),
border = FALSE, bar_width = 1.0,
axis_param = list(at = c(0, 150, 300),
gp = gpar(fontsize = fs),
facing = "outside",
direction = "normal"),
width = unit(10, "mm")),
annotation_name_gp = gpar(fontsize = fl),
annotation_name_side = "left", annotation_name_rot = 0,
show_legend = TRUE)
sampleLabels <- columnAnnotation(
TFnames = anno_mark(which = "column", side = "bottom",
at = match(selLabels, colnames(m)),
labels = sub("^[^_]+_", "", selLabels),
labels_gp = gpar(fontsize = fl)))
# create main heatmap with annotations
hm1 <- Heatmap(m[sel_peaksBin, ],
col = circlize::colorRamp2(
breaks = seq(0, max(m), length.out = 64),
colors = colorRampPalette(binary_heatmap_colors[c("FALSE", "TRUE")])(64)),
border = TRUE, border_gp = gpar(lwd = 0.5),
cluster_rows = TRUE, row_dend_width = unit(25, "mm"), show_row_dend = FALSE,
cluster_columns = TRUE, column_dend_height = unit(10, "mm"),
row_title_gp = gpar(fontsize = fl),
row_split = peaktype[sel_peaksBin], cluster_row_slices = FALSE,
left_annotation = peakAnnot,
right_annotation = fracTFsAnnot,
top_annotation = sampleAnnot,
bottom_annotation = sampleLabels,
show_row_names = FALSE, show_column_names = FALSE,
use_raster = TRUE, show_heatmap_legend = FALSE,
width = unit(w_peaksHm, "mm"), # heatmap body width
heatmap_height = unit(h_peaksHm, "mm")) # whole heatmap height
# draw the heatmap and grab as a grid graphics `grob`
# (needed for combining figure panels)
# use defined random number seed to make clustering deterministic
set.seed(42L)
hm_peaksBin <- grid.grabExpr(
hm1 <- draw(hm1, merge_legend = TRUE,
heatmap_legend_side = "bottom",
annotation_legend_side = "bottom"),
width = w_peaksHm, height = h_peaksHm
)
plot_grid(hm_peaksBin)
The following creates a heatmap similar to the peak-versus-experiment heatmap above, but using the ChIP-seq enrichment values (log2 IP/input) directly instead of binarized (TRUE, FALSE) values.
# use ChIP-seq enrichment matrix from this study (replicates averaged)
m <- peakenrAvg
stopifnot(all(selLabels %in% colnames(m)))
# to keep the same row order as in the binary heatmap `hm1`,
# we have to re-calculate the annotations and switch off the row clustering
sel_peaksEnr <- which(sel_peaksBin)[unlist(row_order(hm1), use.names = FALSE)]
# prepare annotation data
# `chip_enr_sel`: selected public ChIP-seq experiments
# (log2 IP/input enrichments)
# `chip_enr_sel_range`: value range for color scale (1% to 99%)
# `chip_enr_sel_range_full`: value range for color scale (0% to 100%)
chip_enr_sel <- chip_enr[sel_peaksEnr, ]
chip_enr_sel_range <- quantile(abs(chip_enr_sel), probs = c(0.95)) * c(-1, 1)
chip_enr_sel_range_full <- range(chip_enr_sel)
# parameters for annotation legends and colors
annotCols2 <- list(
`ncRNA` = cols_ncRNA,
H3 = colorRamp2(breaks = c(chip_enr_sel_range_full[1],
seq(chip_enr_sel_range[1],
chip_enr_sel_range[2],
length.out = 62),
chip_enr_sel_range_full[2]),
colors = viridisLite::magma(64)),
H3K14ac = colorRamp2(breaks = c(chip_enr_sel_range_full[1],
seq(chip_enr_sel_range[1],
chip_enr_sel_range[2],
length.out = 62),
chip_enr_sel_range_full[2]),
colors = viridisLite::magma(64)),
H3K9me2 = colorRamp2(breaks = c(chip_enr_sel_range_full[1],
seq(chip_enr_sel_range[1],
chip_enr_sel_range[2],
length.out = 62),
chip_enr_sel_range_full[2]),
colors = viridisLite::magma(64)),
H3K9me3 = colorRamp2(breaks = c(chip_enr_sel_range_full[1],
seq(chip_enr_sel_range[1],
chip_enr_sel_range[2],
length.out = 62),
chip_enr_sel_range_full[2]),
colors = viridisLite::magma(64)))
annotLegendParams2 <- list(
`ncRNA` = list(labels_gp = gpar(fontsize = fs),
title_position = "topcenter",
title_gp = gpar(fontsize = fl)),
H3 = list(at = c(-6, 0, 6),
title = "Histone log2 IP/input",
labels_gp = gpar(fontsize = fs),
border = "gray10",
legend_direction = "horizontal",
legend_width = unit(30, "mm"),
title_position = "topcenter",
title_gp = gpar(fontsize = fl)))
# prepare heatmap annotations
# `peakAnnot` for peaks (rows), left side
# `peakAnnot2` for peaks (rows), right side
peakAnnot <- HeatmapAnnotation(
which = "row",
`ncRNA` = near_ncRNA[sel_peaksEnr],
H3 = chip_enr_sel[, "GSE108668_H3"],
H3K14ac = chip_enr_sel[, "GSE108668_H3K14ac"],
H3K9me2 = chip_enr_sel[, "GSE182250_H3K9me2"],
H3K9me3 = chip_enr_sel[, "GSE182250_H3K9me3"],
col = annotCols2,
width = unit(28, "mm"),
annotation_name_gp = gpar(fontsize = fl),
annotation_legend_param = annotLegendParams2,
show_legend = c(TRUE, TRUE, FALSE, FALSE, FALSE))
atac_trunc <- pmin(atac_rpkm_avg[sel_peaksEnr], 2000) # cap ATAC signal at 2000
peakAnnot2 <- HeatmapAnnotation(
which = "row",
`Accessibility\n1e3 RPKM` =
anno_barplot(x = atac_trunc / 1e3,
gp = gpar(col = na_color),
border = FALSE, bar_width = 1.0,
baseline = 0,
axis_param = list(at = c(0, 1, 2),
gp = gpar(fontsize = fs)),
width = unit(10, "mm")),
annotation_name_gp = gpar(fontsize = fl))
# main heatmap value range for color scale
mx <- max(abs(m[sel_peaksEnr, ]))
qs <- quantile(abs(m[sel_peaksEnr, ]), .99)
# create main heatmap with annotations
hm2 <- Heatmap(m[sel_peaksEnr, ], name = "log2 IP/input",
col = circlize::colorRamp2(
breaks = c(-mx, seq(-qs, qs, length.out = 62), mx),
colors = colorRampPalette(enrichment_heatmap_colors)(64)),
cluster_rows = FALSE,
cluster_columns = column_dend(hm1), column_dend_height = unit(10, "mm"),
row_title_gp = gpar(fontsize = fl),
row_split = peaktype[sel_peaksEnr], cluster_row_slices = FALSE,
border = TRUE, border_gp = gpar(lwd = 0.5),
left_annotation = peakAnnot,
right_annotation = peakAnnot2,
top_annotation = sampleAnnot,
bottom_annotation = sampleLabels,
show_row_names = FALSE, show_column_names = FALSE,
use_raster = TRUE, show_heatmap_legend = TRUE,
heatmap_legend_param = list(at = round(c(-mx, 0, mx), 1),
labels_gp = gpar(fontsize = fs),
border = "gray10",
legend_direction = "horizontal",
legend_width = unit(30, "mm"),
title = "TF log2 IP/input",
title_position = "topcenter",
title_gp = gpar(fontsize = fl)),
width = unit(w_peaksHm, "mm"), # heatmap body width
heatmap_height = unit(h_peaksHm, "mm"))
# draw the heatmap and grab as a grid graphics `grob`
# (needed for combining figure panels)
# use defined random number seed to make clustering deterministic
set.seed(42L)
hm_peaksEnr <- grid.grabExpr(
hm2 <- draw(hm2, merge_legend = TRUE,
heatmap_legend_side = "bottom",
annotation_legend_side = "bottom"),
width = w_peaksHm, height = h_peaksHm
)
plot_grid(hm_peaksEnr)
We want to illustrate ChIP and Input read densities for selected TFs around their gene locus.
gnmtracks1 <- plottracks(df = chip_genome_tab,
gns = c("atf1", "pcr1", "cdc10", "zas1", "reb1", "toe3"),
cols = c(atf1 = main_colors[3], pcr1 = main_colors[2],
cdc10 = main_colors[1], zas1 = "#66A61E",
reb1 = main_colors[5], toe3 = main_colors[4]))
gnmtracks1
gnmtracks2 <- plottracks(df = chip_genome_tab,
gns = c("atf1", "pcr1", "cdc10", "zas1", "reb1", "toe3",
"fkh2", "thi5", "SPCC320.03", "SPCC777.02", "prz1"),
cols = c(atf1 = main_colors[3], pcr1 = main_colors[2],
cdc10 = main_colors[1], zas1 = "#66A61E",
reb1 = main_colors[5], toe3 = main_colors[4],
fkh2 = main_colors[3], thi5 = main_colors[2],
prz1 = main_colors[5], `SPCC320.03` = "#66A61E",
`SPCC777.02` = main_colors[1]),
font_size = 9)
gnmtracks2
The following creates a heatmap of pairwise motif similarities (motifsim), hierarchically clustered and grouped into n_split clusters. Specific clusters are are highlighted on the right and motifs in these clusters are labeled.
# define heatmap colors
cols <- hcl.colors(64, "viridis")
# use squared pairwise motif similarities without diagonal entries (self)
motifsim2 <- motifsim^2
diag(motifsim2) <- NA
# value range for color scale
rng <- range(motifsim2, na.rm = TRUE)
# hierarchical clustering of motifs (cut tree to produce `n_split` clusters)
motifcl <- hclust(dist(motifsim2, method = "euclidean"), method = "complete")
n_split <- 17
motifcl_num <- cutree(motifcl, k = n_split)
motifcl <- dendextend::rotate(as.dendrogram(motifcl),
c(motifcl_num[motifcl_num != 3],
motifcl_num[motifcl_num == 3]))
# prepare heatmap annotation data
# `selMotifCol`: colors for highlighted motif clusters
# `selMotifIndex`: row index of motifs in highlighted clusters
# `nMotifs`: number of motifs identified per transcription factor
selMotifCol <- c("0" = bg_color, "3" = "black", "13" = "black", "16" = "black")
selMotifIndex <- which(motifcl_num[rownames(motifsim2)] %in% names(selMotifCol))
nMotifs <- unclass(table(sub("[.]m[0-9]+$", "", rownames(motifsim2))))[
sub("[.]m[0-9]+$", "", rownames(motifsim))
]
# prepare heatmap annotations
# `motifAnnot` for motifs (columns), top
# `selMotifAnnot` for motif (rows), right side
motifAnnot <- HeatmapAnnotation(
"Number of\nmotifs per TF" = nMotifs,
col = list(
"Number of\nmotifs per TF" = circlize::colorRamp2(
colors = colorRampPalette(c("white", main_colors[4]))(max(nMotifs)),
breaks = seq.int(max(nMotifs)))
),
show_legend = TRUE, show_annotation_name = TRUE,
annotation_name_side = "right",
annotation_name_gp = gpar(fontsize = fl),
annotation_legend_param = list(at = seq.int(max(nMotifs)),
labels_gp = gpar(fontsize = fl),
border = "gray10",
legend_direction = "horizontal",
legend_width = unit(18, "mm"),
title_position = "lefttop",
title_gp = gpar(fontsize = fl)))
selMotifAnnot <- rowAnnotation(
highlight = ifelse(motifcl_num %in% names(selMotifCol),
as.character(motifcl_num), "0"),
motifnames = anno_mark(which = "row", side = "right",
at = selMotifIndex,
labels = rownames(motifsim2)[selMotifIndex],
labels_gp = gpar(fontsize = fl)),
col = list(highlight = selMotifCol),
show_legend = FALSE, show_annotation_name = FALSE)
# create main heatmap with annotations
hm4 <- Heatmap(matrix = motifsim2, name = "Motif\nsimilarity",
col = circlize::colorRamp2(breaks = seq(rng[1], rng[2],
length.out = 64),
colors = cols),
cluster_rows = motifcl,
show_row_dend = TRUE, row_dend_width = unit(10, "mm"),
cluster_columns = motifcl, show_column_dend = FALSE,
row_split = n_split, column_split = n_split,
column_title = " ",
row_gap = unit(0.2, "mm"), column_gap = unit(0.2, "mm"),
show_row_names = FALSE,
column_title_gp = gpar(fontsize = fl),
row_title = " ",
show_column_names = FALSE,
top_annotation = motifAnnot,
right_annotation = selMotifAnnot,
heatmap_legend_param = list(
at = round(c(rng[1], mean(rng), rng[2]), 2),
labels_gp = gpar(fontsize = fl),
border = "gray10",
legend_direction = "horizontal",
legend_width = unit(18, "mm"),
title_position = "lefttop",
title_gp = gpar(fontsize = fl)),
width = unit(w_peaksHm, "mm"), # heatmap body width
height = unit(w_peaksHm, "mm")) # heatmap body height
# draw the heatmap and grab as a grid graphics `grob`
# (needed for combining figure panels)
# use defined random number seed to make clustering deterministic
set.seed(42L)
hm_motifs_small <- grid.grabExpr(
hm4 <- draw(hm4, merge_legend = TRUE,
heatmap_legend_side = "bottom",
annotation_legend_side = "bottom"),
width = w_peaksHm, height = w_peaksHm
)
plot_grid(hm_motifs_small)
The following creates aligned sequence logos for the motifs in the highlighted clusters from the motif similarity heatmap.
# `pL`: list of plots
pL <- list()
# for each highlighted motif cluster...
for (cl in setdiff(names(selMotifCol), c("0", "3"))) {
# exctract the motif matrices
selmotifs <- motifs$motif[match(rownames(motifsim2)[motifcl_num == cl], motifs$name)]
# align and visualize as sequence logos
pL[[cl]] <- universalmotif::view_motifs(
motifs = selmotifs,
method = "PCC", tryRC = TRUE,
min.overlap = 4,
min.mean.ic = 0.25,
normalise.scores = TRUE,
show.positions = FALSE,
names.pos = "right",
text.size = 10)
}
# for each motif row index, get the corresponding cluster
indexToClustername <- structure(rep(seq.int(n_split),
lengths(row_order(hm4))),
names = unlist(row_order(hm4)))
# `pL2`: list of plots similar to `pL` but with added empty plots for white space
pL2 <- rep(list(NULL), 2 * length(pL) + 1)
midx <- seq(2, length(pL2), by = 2)
pL2[midx] <- pL
relh_motifs <- c(0.4, rep(0.8, length(pL2) - 1))
relh_motifs[midx] <- lengths(
split(selMotifIndex, motifcl_num[selMotifIndex])[
setdiff(names(selMotifCol), c("0", "3"))
])
# combine the individual plots
motifs_aligned <- plot_grid(plotlist = pL2, align = "none", ncol = 1,
hjust = -0.1, vjust = 1.1,
label_size = 10,
rel_heights = relh_motifs)
print(motifs_aligned)
Create “upset” plots showing the overlap of transcription factor ChIP-seq peaks among the factors in a highlighted motif cluster.
# select specific and common-frequent peaks only
peakgrSel <- peakgr[peakgr$peaktype %in% c("Specific peaks", "Common (frequent)")]
# `pL3`: list of individual upset plots
pL3 <- list()
# for each highlighted motif cluster...
for (cl in setdiff(names(selMotifCol), c("0", "3"))) {
motifnms <- rownames(motifsim2)[motifcl_num == cl]
cltfs <- unique(sub("[.]m[0-9]+$", "", motifnms))
ovL <- as.list(mcols(peakgrSel)[, paste0("is_enr_in.", cltfs)])
ovL <- lapply(ovL, function(i) names(peakgrSel)[i])
names(ovL) <- sub("^is_enr_in.", "", names(ovL))
nm <- unique(indexToClustername[as.character(which(motifcl_num == cl))])
pd <- stack(ovL) |>
left_join(y = data.frame(values = names(peakgr),
type = peakgr$peaktype),
join_by(values)) |>
group_by(values) |>
summarise(ind = list(ind),
type = unique(type))
pL3[[paste0("cluster ", nm)]] <-
ggplot(pd, aes(x = ind, fill = type)) +
geom_bar() +
scale_x_upset() +
scale_fill_manual(values = peakset_colors) +
labs(x = element_blank(),
y = "Number of\npeaks",
fill = element_blank()) +
theme_cowplot(fm) +
theme(axis.ticks.x = element_blank(),
legend.position = ifelse(cl == "16", "inside", "none"),
legend.position.inside = c(0.95, 0.95),
legend.justification.inside = c(1, 1)) +
theme_combmatrix(combmatrix.label.text = element_text(colour = "black",
size = fm),
combmatrix.panel.point.size = 2.0,
combmatrix.panel.line.size = 0.8,
combmatrix.label.extra_spacing = 5,
combmatrix.label.make_space = TRUE,
combmatrix.label.width = unit(12, "mm"),
plot.margin = unit(c(7, 7, 1, 7), "points")) # top, right, bottom, left
}
# `pL4`: list of plots similar to `pL3` but with added empty plots for white space
pL4 <- rep(list(NULL), 2 * length(pL) + 1)
pL4[midx] <- pL3
relh_upsets <- relh_motifs
relh_upsets[midx] <- relh_upsets[midx] + relh_upsets[midx + 1] - 0.01
relh_upsets[midx + 1] <- 0.01
# combine the individual plots
upsets_aligned <- plot_grid(plotlist = pL4, align = "hv", axis = "l", ncol = 1,
rel_heights = relh_upsets)
print(upsets_aligned)
We want to know which transcription factors are binding their own promoter regions (1kb windows centered on transcription start sites) and thus are possibly regulating their own transcription.
For visualization, we classify known transcription factors (rows) into three groups (the ones that do or do not bind their own promoter, and the ones that were not ChIP’ed in this study) and indicate in a binary heatmap the ChIP-seq experiments (columns) that showed enrichment at the promoter.
# prepare data
# ... transcription factor genes in ChIP-seq experiments (columns)
tfnms <- colnames(mcols(peakgr))[grep("^is_enr_in[.]", colnames(mcols(peakgr)))]
tfnms <- sub("^is_enr_in[.]", "", tfnms)
tfnms <- ifelse(grepl("^SP", tfnms), tfnms, .lowercase(tfnms))
# ... transcription factors in `txannot` (rows)
tfnms_all <- txannot$unique_einprot_id[match(dbd$PomBaseID, txannot$gene_id)]
tfnms_all <- tfnms_all[order(match(tfnms_all, tfnms))] # order as in `tfnms`
tx_sel <- ifelse(tfnms_all %in% txannot$gene_id,
match(tfnms_all, txannot$gene_id),
match(tfnms_all, txannot$gene_name))
stopifnot(all(!duplicated(txannot[tx_sel, "gene_id"]))) # exactly one transcript per gene
# ... transcription factors that were ChIP'ed in this study
tfnms_chip <- tfnms_all[txannot[tx_sel, "ChIP.this.study"]]
# ... create promoter/terminator binding matrix
mprom <- do.call(rbind, lapply(tx_sel, function(i) {
tfnms_i <- strsplit(x = txannot[i, "prom.enriched.TFs"], split = ";")[[1]]
as.numeric(tfnms %in% ifelse(grepl("^SP", tfnms_i), tfnms_i, .lowercase(tfnms_i)))
}))
dimnames(mprom) <- list(tfnms_all, tfnms)
stopifnot(all.equal(sum(txannot$binds.own.promoter),
sum(diag(mprom) > 0)))
diag(mprom[tfnms_chip, tfnms_chip]) <- ifelse(diag(mprom[tfnms_chip, tfnms_chip]) > 0,
2, diag(mprom[tfnms_chip, tfnms_chip]))
# generate binary promoter binding heatmap
# ... label autoregulatory TFs
selLabels <- tfnms_chip[diag(mprom[tfnms_chip, tfnms_chip]) > 0]
stopifnot(all(selLabels %in% rownames(mprom)))
selLabelsPlot <- selLabels
selLabelsPlot[match("zas1", selLabels)] <- "zas1*"
rowLabels <- rowAnnotation(
TFnames = anno_mark(which = "row", side = "right",
at = match(selLabels, rownames(mprom)),
labels = selLabelsPlot,
labels_gp = gpar(fontsize = fl)))
# ... group TFs by promoter type
promtype <- factor(ifelse(txannot[tx_sel, "ChIP.this.study"],
ifelse(rowSums(mprom) > 0,
"Bound\nTF promoters", "Non-bound\nTF promoters"),
"Not\nChIP'ed"),
levels = c("Bound\nTF promoters", "Non-bound\nTF promoters",
"Not\nChIP'ed"))
# ... order rows according to `promtype` and auto-regulation
is_auto <- function(nm, x = mprom) {
d <- diag(x[match(nm, rownames(x)),
match(nm, colnames(x))])
!is.na(d) & d > 0
}
o <- order(c(3, 2, 1)[as.numeric(promtype)] * 100 +
is_auto(rownames(mprom), mprom) * 20 +
rowSums(mprom), decreasing = TRUE)
oc <- match(intersect(rownames(mprom)[o], colnames(mprom)), colnames(mprom))
# ... create main heatmap with annotations
hm5 <- Heatmap(mprom[o, oc], name = "hm5",
col = circlize::colorRamp2(
breaks = seq(0, 2, length.out = 64),
colors = colorRampPalette(c(binary_heatmap_colors["FALSE"],
main_colors[1],
binary_heatmap_colors["TRUE"]))(64)),
border = TRUE, border_gp = gpar(lwd = 0.5),
cluster_rows = FALSE, show_row_dend = FALSE,
cluster_columns = FALSE, show_column_dend = FALSE,
row_split = promtype[o], row_title_gp = gpar(fontsize = fl),
column_title = "TF ChIP-seq experiments",
column_title_gp = gpar(fontsize = fl),
show_row_names = FALSE, show_column_names = FALSE,
right_annotation = rowLabels[o],
use_raster = TRUE, show_heatmap_legend = FALSE,
width = unit(w_peaksHm, "mm"), # heatmap body width
height = unit(w_peaksHm, "mm"))
hm_prom <- grid.grabExpr(
{
hm5 <- draw(hm5, merge_legend = TRUE,
heatmap_legend_side = "bottom",
annotation_legend_side = "bottom")
decorate_heatmap_body("hm5", {
grid.lines(c(mean(grepl("^Bound", promtype[match(tfnms, tfnms_all)])), 0),
c(0, 1), gp = gpar(lty = 2, lwd = 0.5))
}, slice = 1)
decorate_heatmap_body("hm5", {
grid.lines(c(1, mean(grepl("^Bound", promtype[match(tfnms, tfnms_all)]))),
c(0, 1), gp = gpar(lty = 2, lwd = 0.5))
}, slice = 2)
},
width = w_peaksHm, height = w_peaksHm
)
plot_grid(hm_prom)
pd <- peakenr |> as.data.frame() |>
tibble::rownames_to_column("peakid") |>
pivot_longer(col = !matches("peakid")) |>
mutate(group = sub("_rep[12]$", "", name),
name = factor(name, levels = unique(name)))
ylims <- c(-2.0, 4.0)
yticks <- c(-2, 0, 2, 4)
pL <- lapply(levels(fct_relevel(sort(unique(pd$group)), "Untagged", after = Inf)),
function(grp1) {
pd1 <- pd[pd$group == grp1, ] |> pivot_wider()
pd1$cols <- densCols(x = pd1[[paste0(grp1, "_rep1")]],
y = pd1[[paste0(grp1, "_rep2")]],
nbin = 64, colramp = colorRampPalette(hcl.colors(32)))
ggplot(pd1, aes(.data[[paste0(grp1, "_rep1")]],
.data[[paste0(grp1, "_rep2")]])) +
geom_abline(intercept = 0, slope = 1, linetype = 1, color = "gray") +
geom_hline(yintercept = 0, linetype = 2, color = "gray") +
geom_vline(xintercept = 0, linetype = 2, color = "gray") +
geom_scattermost(xy = as.data.frame(pd1[, paste0(grp1, c("_rep1", "_rep2"))]),
pointsize = 2, color = pd1$cols, pixels = c(256, 256)) +
coord_fixed(xlim = ylims, ylim = ylims, expand = FALSE, clip = "off") +
scale_x_continuous(breaks = yticks) +
scale_y_continuous(breaks = yticks) +
theme_cowplot(7) +
labs(x = element_blank(), y = element_blank()) +
annotate("text", x = -Inf, y = Inf, hjust = -0.05, vjust = 1.05,
label = grp1, color = "black", size = 3) +
theme(legend.position = "none")
})
(gg_enrpairs <- plot_grid(plotlist = pL, nrow = 9, ncol = 9))
The following barplots show the numbers of ChIP-seq peaks that are near a tRNA or 5S rRNA gene (top panel), or the numbers of tRNA/5S rRNA genes that are distal or near a ChIP-seq peak (bottom panel).
# prepare `data.frame`s with plotting data
df_peaks <- data.frame(peaktype = peaktype[sel_peaksEnr],
near_ncRNA = near_ncRNA[sel_peaksEnr])
df_ncRNA <- data.frame(ncRNAtype = factor(
rep(c("tRNA", "5S rRNA"), c(sum(is_tRNA), sum(is_rRNA))),
levels = c("tRNA", "5S rRNA")),
near_peak = !is.na(c(dist.tRNA2peak, dist.rRNA2peak)) &
c(dist.tRNA2peak, dist.rRNA2peak) < maxdist)
df_ncRNA$group <- factor(paste0(df_ncRNA$ncRNAtype, " ",
c("TRUE" = "near", "FALSE" = "distal")[as.character(df_ncRNA$near_peak)]),
levels = c("tRNA distal","tRNA near",
"5S rRNA distal", "5S rRNA near"))
# generate plots
# ... numbers of peaks near genes
p_ncRNA1 <- ggplot(df_peaks, aes(peaktype, fill = near_ncRNA)) +
geom_bar() +
scale_fill_manual(values = cols_ncRNA) +
labs(x = element_blank(), y = "Number of peaks", fill = "Near ncRNA:") +
theme_cowplot(8) +
theme(legend.position = "inside",
legend.position.inside = c(0.05, 0.95),
legend.justification = c(0, 1))
# ... numbers of genes near peaks
p_ncRNA2 <- ggplot(df_ncRNA, aes(ncRNAtype, fill = group)) +
geom_bar() +
scale_fill_manual(values = c(`tRNA near` = cols_ncRNA[["tRNA"]],
`tRNA distal` = lighten(cols_ncRNA["tRNA"], 0.2),
`5S rRNA near` = cols_ncRNA[["5S rRNA"]],
`5S rRNA distal` = lighten(cols_ncRNA["5S rRNA"], 0.2))) +
labs(x = element_blank(), y = "Number of genes", fill = "Relative\npeak location:") +
theme_cowplot(8) +
theme(legend.position = "inside",
legend.position.inside = c(0.95, 0.95),
legend.justification = c(1, 1))
# ... combine the two panels
(p_ncRNA <- plot_grid(p_ncRNA1, p_ncRNA2, align = "v", nrow = 2))
sce150untag <- readRDS(params$ipms150untag)
res150untag <- .getTestCols(sce150untag, adjp_cutoff = 0.01, logfc_cutoff = 1)
## Loading required package: SingleCellExperiment
tstat <- res150untag$tstat
colnames(tstat) <- .getProteinNameFromComparison(colnames(tstat))
tstat <- tstat[rownames(tstat) %in% colnames(tstat), ]
tstat[is.na(tstat)] <- 0
dim(tstat)
## [1] 85 89
hot_tfs <- c("Php3", "Sak1", "Pcr1", "Prr1", "Atf1", "Rst2", "Adn2",
"Adn3", "Hsr1", "Phx1", "Pho7")
stopifnot(all(hot_tfs %in% rownames(tstat)))
stopifnot(all(hot_tfs %in% colnames(tstat)))
stopifnot(all(rownames(tstat) %in% colnames(tstat)))
tstat <- tstat[c(hot_tfs, setdiff(rownames(tstat), hot_tfs)), ]
tstat <- tstat[, c(rownames(tstat), setdiff(colnames(tstat), rownames(tstat)))]
hmipmshot <- Heatmap(
tstat,
col = circlize::colorRamp2(
breaks = seq(3.0, 13.0, length.out = 64),
colors = colorRampPalette(binary_heatmap_colors[c("FALSE", "TRUE")])(64)),
border = TRUE,
row_split = ifelse(rownames(tstat) %in% hot_tfs, "HOT TFs", "Other TFs"),
column_split = ifelse(colnames(tstat) %in% hot_tfs, "HOT TFs", "Other TFs"),
cluster_rows = FALSE, cluster_columns = FALSE,
row_title_gp = gpar(fontsize = fl), row_names_gp = gpar(fontsize = fs),
column_title_gp = gpar(fontsize = fl), show_column_names = FALSE,
heatmap_legend_param = list(labels_gp = gpar(fontsize = fs),
border = "gray10",
legend_direction = "horizontal",
legend_width = unit(24, "mm"),
title_position = "lefttop",
title_gp = gpar(fontsize = fl)),
na_col = bg_color,
name = "Mod. t-stat.")
hm_ipms_hot <- grid.grabExpr(
hmipmshot <- draw(hmipmshot, merge_legend = TRUE,
heatmap_legend_side = "bottom")
)
plot_grid(hm_ipms_hot)
Assemble the panels into Figure 2:
cowplot::plot_grid(
hm_peaksBin,
hm_peaksEnr,
hm_prom,
cowplot::plot_grid(gnmtracks1, scale = 0.9),
hm_motifs_small,
cowplot::plot_grid(
motifs_aligned, upsets_aligned,
nrow = 1, rel_widths = c(1.25, 1)),
nrow = 3,
labels = c("A", "B", "C", "D", "E", "F"),
align = "vh",
axis = "b",
rel_widths = c(1, 1),
rel_heights = c(h_peaksHm + 10, w_peaksHm + 10, w_peaksHm + 20)
)
Assemble the panels into Supplementary Figure 2:
cowplot::plot_grid(
cowplot::plot_grid(gg_enrpairs, scale = 0.9, labels = "A"),
cowplot::plot_grid(NULL, NULL, NULL, # space for labels
nrow = 1, labels = c("B", "C", "D"),
rel_widths = c(130, 100, 100)),
cowplot::plot_grid(
hm_ipms_hot,
cowplot::plot_grid(gnmtracks2, NULL, ncol = 1, rel_heights = c(12, 1)),
p_fracbound,
nrow = 1,
labels = c("", "", ""),
align = "v",
axis = "b",
rel_widths = c(130, 100, 100),
rel_heights = c(180)
),
nrow = 3,
rel_widths = c(1),
rel_heights = c(12, 0.25, 9.75)
)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.10.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Zurich
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] SingleCellExperiment_1.24.0 lubridate_1.9.3
## [3] stringr_1.5.1 purrr_1.0.2
## [5] readr_2.1.5 tidyverse_2.0.0
## [7] forcats_1.0.0 kableExtra_1.4.0
## [9] dendextend_1.17.1 jsonlite_1.8.8
## [11] SummarizedExperiment_1.32.0 Biobase_2.62.0
## [13] MatrixGenerics_1.14.0 matrixStats_1.3.0
## [15] viridisLite_0.4.2 dplyr_1.1.4
## [17] ggupset_0.3.0 scattermore_1.2
## [19] colorspace_2.1-0 universalmotif_1.20.0
## [21] tidyr_1.3.1 tibble_3.2.1
## [23] BSgenome_1.70.2 rtracklayer_1.62.0
## [25] BiocIO_1.12.0 GenomicRanges_1.54.1
## [27] Biostrings_2.70.3 GenomeInfoDb_1.38.8
## [29] XVector_0.42.0 IRanges_2.36.0
## [31] S4Vectors_0.40.2 BiocGenerics_0.48.1
## [33] cowplot_1.1.3 ggplot2_3.5.0
## [35] ComplexHeatmap_2.18.0 circlize_0.4.16
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 gridExtra_2.3 rlang_1.1.3
## [4] magrittr_2.0.3 clue_0.3-65 GetoptLong_1.0.5
## [7] compiler_4.3.2 png_0.1-8 systemfonts_1.0.6
## [10] vctrs_0.6.5 pkgconfig_2.0.3 shape_1.4.6.1
## [13] crayon_1.5.2 fastmap_1.1.1 magick_2.8.3
## [16] labeling_0.4.3 utf8_1.2.4 Rsamtools_2.18.0
## [19] rmarkdown_2.26 tzdb_0.4.0 xfun_0.43
## [22] zlibbioc_1.48.2 cachem_1.0.8 highr_0.10
## [25] DelayedArray_0.28.0 BiocParallel_1.36.0 cluster_2.1.4
## [28] R6_2.5.1 bslib_0.7.0 stringi_1.8.3
## [31] RColorBrewer_1.1-3 jquerylib_0.1.4 Rcpp_1.0.12
## [34] iterators_1.0.14 knitr_1.46 timechange_0.3.0
## [37] Matrix_1.6-5 tidyselect_1.2.1 rstudioapi_0.16.0
## [40] abind_1.4-5 yaml_2.3.8 viridis_0.6.5
## [43] doParallel_1.0.17 codetools_0.2-19 lattice_0.22-6
## [46] withr_3.0.0 evaluate_0.23 xml2_1.3.6
## [49] pillar_1.9.0 KernSmooth_2.23-22 foreach_1.5.2
## [52] generics_0.1.3 RCurl_1.98-1.14 hms_1.1.3
## [55] munsell_0.5.1 scales_1.3.0 glue_1.7.0
## [58] tools_4.3.2 GenomicAlignments_1.38.2 XML_3.99-0.16.1
## [61] Cairo_1.6-2 GenomeInfoDbData_1.2.11 restfulr_0.0.15
## [64] cli_3.6.2 fansi_1.0.6 S4Arrays_1.2.1
## [67] svglite_2.1.3 gtable_0.3.5 sass_0.4.9
## [70] digest_0.6.35 SparseArray_1.2.4 rjson_0.2.21
## [73] farver_2.1.1 htmltools_0.5.8.1 lifecycle_1.0.4
## [76] GlobalOptions_0.1.2 MASS_7.3-60